---
title: "Multi-Level Conceptual Replication of Fulmer et al. (2010): Models"
author: "Matthew Samson"
date: "19 January 2015"
output: html_document
---

Install and require Packages

```{r}

install.packages(c("psych","nlme", "reshape", "ICC", "plyr", "data.table"))

require("psych"); require("nlme"); require("reshape"); require("ICC"); require("plyr"); require("data.table")

```

Set Working Directory and load data.

Data from "ics2001" is described briefly in "Final Table Script". 

"bigfive" contains country-level extroversion scores and "collectivism" contains country level collectivism scores. These scores will be fit as a factor and covariate respectively in Model 1.

```{r, echo=TRUE}

#Set Working Directory (insert appropriate directory below)


#Load data
bigfive <- read.csv("1_Big Five By Country.csv")
collectivism <- read.csv("2_Invdividualism and Collectivism Scores by Country.csv")
ics2001 <- read.csv("3_International College Survey 2001 Final.csv")

```

Adjust for reverse coding and aggregate relevant variables in ics2001.

```{r}

#Reverse coding
ics2001$ext2 <- 6 - ics2001$ext2
ics2001$ext3 <- 6 - ics2001$ext3
ics2001$ext6 <- 6 - ics2001$ext6

#Average Extroversion
ics2001$ext_tot <- (ics2001$ext1 + ics2001$ext2 + ics2001$ext3 + ics2001$ext4 + ics2001$ext5 + ics2001$ext6)/6

#Average Subjective Well-Being
ics2001$swl_tot <- (ics2001$swl1 + ics2001$swl2 + ics2001$swl3 + ics2001$swl4 + ics2001$swl5)/5

#Average Collectivism (for Model 2)
ics2001$col_tot <- (ics2001$col1 + ics2001$col2 + ics2001$col3)/3

```

**Model 1: MLM with 26 Countries** 

Create TEMP file with relevant variables, without acquiescent respondents (denoted by the variable "acq") and only the countries that will be used in the analysis. 

```{r}

TEMP1 <- na.omit(ics2001[,c("acq", "country", "ext_tot", "swl_tot")])

TEMP1 <- subset(TEMP1, acq == "0")
TEMP1 <-subset(TEMP1, country == "Australia" | country == "Austria" | country == "Brazil" | country == "Canada" | country == "China" | country == "Germany" | country == "Hong Kong" | country == "India" | country == "Indonesia" | country == "Iran" | country == "Italy" | country == "Japan" | country == "Kuwait" | country == "Malaysia" | country == "Mexico" | country == "Nigeria" | country == "Philippines" | country == "Poland" | country == "Portugal" | country == "Russia" | country == "Slovenia" | country == "South Korea" | country == "Spain" | country == "Thailand" | country == "Turkey" | country == "USA")

```

Create group means and SD's for EXT and SWL. These are for the computation of z-scores by country, to facilitate the removal of outliers. The means produced here are NOT the means that are used for group mean centring of the models reported in the manuscript. 

```{r}

#Means
country.means <- data.table(TEMP1)
country.means <- data.frame(country.means[,list(country.ext_tot=mean(ext_tot,na.rm=T),country.swl_tot=mean(swl_tot,na.rm=T)),by=country])

#Rename
country.means <- rename(country.means, c("country.ext_tot"="EXT_M", "country.swl_tot"="SWL_M"))

#SD's
country.sds <- data.table(TEMP1)
country.sds <- data.frame(country.sds[,list(country.ext_tot=sd(ext_tot,na.rm=T),country.swl_tot=sd(swl_tot,na.rm=T)),by=country])

#Rename
country.sds <- rename(country.sds, c("country.ext_tot"="EXT_SD", "country.swl_tot"="SWL_SD"))

#Merge
country.means_sds <- merge(country.means,country.sds,by="country")
TEMP1 <- merge(TEMP1,country.means_sds,by="country")

```

Compute z-scores and then delete outliers. Z-scores reflect the deviation of each participants' scale score from the mean for that country. Outliers were those classified as +/- 3 SD from their country's mean for EXT and/or SWL.

```{r}

#Compute Z-Scores
TEMP1$z_ext <- (TEMP1$ext_tot-TEMP1$EXT_M)/TEMP1$EXT_SD
TEMP1$z_swl <- (TEMP1$swl_tot-TEMP1$SWL_M)/TEMP1$SWL_SD

#Delete outliers 
TEMP1$ext_tot[TEMP1$z_ext>3 & !is.na(TEMP1$ext_tot)] <- NA
TEMP1$ext_tot[TEMP1$z_ext<(-3) & !is.na(TEMP1$ext_tot)] <- NA
TEMP1$swl_tot[TEMP1$z_swl>3 & !is.na(TEMP1$swl_tot)] <- NA
TEMP1$swl_tot[TEMP1$z_swl<(-3) & !is.na(TEMP1$swl_tot)] <- NA

```

Now that outliers are removed, we need to calculate country means for extroversion again, and then group mean centre scores. 'Group mean centring' simply means that we subtract the average extroversion score for each country from all observations within that country. Using group mean centred predictors improves the robustness of the multilevel models we will fit subsequently. The centred extroversion scores we will use in our model is denoted by the variable label "ext_c".


```{r}

#Delete now irrelevant variables from data set (i.e. keep only relevant variables)
TEMP1 <- na.omit(TEMP1[,c("country", "ext_tot", "swl_tot")])

#Create new group means for extroversion to dataset
ext.means <- data.table(TEMP1)
ext.means <- data.frame(ext.means[,list(country.ext_tot=mean(ext_tot,na.rm=T)),by=country])

#Rename variable and merge
ext.means <- rename(ext.means, c("country.ext_tot"="EXT_M"))
TEMP1 <- merge(TEMP1, ext.means, by="country")

#Group centre extroversion
TEMP1$ext_c <- TEMP1$ext_tot - TEMP1$EXT_M

describe(TEMP1$EXT_M)

```

Now, we turn to extroversion scores from the big five. We want to create low, centred and high versions of these scores, for subsequent simple slopes analyses. 

```{r}

names(bigfive)

bigfive$c_extrov <- bigfive$extrov - mean(bigfive$extrov)
bigfive$low_extrov <- bigfive$c_extrov + sd(bigfive$extrov)
bigfive$high_extrov <- bigfive$c_extrov - sd(bigfive$extrov)

```


Merge country and individaul level datasets for MLM; rename variables; and create TEMP dataset for analysis. 

```{r}

country_scores <- merge(bigfive, collectivism, by="country")
totaldata <- merge(country_scores, TEMP1, by="country")

totaldata <- rename(totaldata, c("extrov"="country_ext","SOC.PRAC.Collectivism..GLOBE."="globe_coll"))

TEMP2 <- na.omit(totaldata[,c("country", "ext_c", "swl_tot", "country_ext", "globe_coll")])

#Create data file for simple slopes analysis
TEMP3 <- na.omit(totaldata[,c("country", "ext_c", "swl_tot", "c_extrov", "low_extrov", "high_extrov", "globe_coll")])

```

MLM Analysis and Confidence Interval for Interaction Slope estimate

"ext_c" is specified as a fully random effect; there is a compound symmetry correlation structure; and, restricted maximum liklihood estimation is used. 

```{r}

model1 <- lme(swl_tot ~ globe_coll + ext_c*country_ext, data=TEMP2, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model1)

#Interaction Lower bound CI
0.019914 - (1.96*0.0080435)
#Interaction Upper bound CI
0.019914 + (1.96*0.0080435)

#Positive correlation but negative coefficient in MLM model. 
cor.test(TEMP2$ext_c, TEMP2$swl_tot)

```

**Model 2: MLM with 46 Countries**

Create another TEMP file, with variables relevant to Model 2; remove acquiescent respondents; and, omit observations from Egypt and Zimbabwe due to anomalous responding (more info in the mansuscript). 

```{r}

TEMP3 <- na.omit(ics2001[,c("acq", "country", "ext_tot", "col_tot", "swl_tot")])

TEMP3 <- subset(TEMP3, acq == "0")
TEMP3 <-subset(TEMP3, !country == "Egypt")
TEMP3 <-subset(TEMP3, !country == "Zimbabwe")

```

Create group means and SD's for EXT, SWL and COL. Again, this is to facilitate the removal of outliers. Group means will be calculated again subsequently, for inclusion in the final model. 

```{r}

#Means
country.means1 <- data.table(TEMP3)
country.means1 <- data.frame(country.means1[,list(country.ext_tot=mean(ext_tot,na.rm=T), country.col_tot=mean(col_tot,na.rm=T), country.swl_tot=mean(swl_tot,na.rm=T)), by=country])

#Rename Variables
country.means1 <- rename(country.means1, c("country.ext_tot"="EXT_M", "country.col_tot"="COL_M", "country.swl_tot"="SWL_M"))

#SD's
country.sds1 <- data.table(TEMP3)
country.sds1 <- data.frame(country.sds1[,list(country.ext_tot=sd(ext_tot,na.rm=T),country.swl_tot=sd(swl_tot,na.rm=T), country.col_tot=sd(col_tot,na.rm=T)),by=country])

names(country.sds1)

#Rename Variables
country.sds1 <- rename(country.sds1, c("country.ext_tot"="EXT_SD", "country.swl_tot"="SWL_SD", "country.col_tot"="COL_SD"))

#Merge Means and SD's
country.means_sds1 <- merge(country.means1,country.sds1,by="country")
TEMP3 <- merge(TEMP3,country.means_sds1,by="country")

```

Compute z-scores and delete outliers, as per Model 1.

Note, we have to do this again for Model 2 because we are using a different subset of participants from the ics2001. 

```{r}

#Compute Z-Scores
TEMP3$z_ext <- (TEMP3$ext_tot-TEMP3$EXT_M)/TEMP3$EXT_SD
TEMP3$z_swl <- (TEMP3$swl_tot-TEMP3$SWL_M)/TEMP3$SWL_SD

#Delete outliers 
TEMP3$ext_tot[TEMP3$z_ext>3 & !is.na(TEMP3$ext_tot)] <- NA
TEMP3$ext_tot[TEMP3$z_ext<(-3) & !is.na(TEMP3$ext_tot)] <- NA
TEMP3$swl_tot[TEMP3$z_swl>3 & !is.na(TEMP3$swl_tot)] <- NA
TEMP3$swl_tot[TEMP3$z_swl<(-3) & !is.na(TEMP3$swl_tot)] <- NA

```

Create group means for Extroversion and Collectivism again, this time without outliers. This is as per Model 1, only this time these variables will comprise the Level 2 factors we actually fit in the final model. 

```{r}

#Delete now irrelevant variables from data set
TEMP3 <- na.omit(TEMP3[,c("country", "ext_tot", "col_tot", "swl_tot")])

#Create new group means for extroversion to dataset
ext.col.swl.means <- data.table(TEMP3)
ext.col.swl.means <- data.frame(ext.col.swl.means[,list(country.ext_tot=mean(ext_tot,na.rm=T), country.col_tot=mean(col_tot,na.rm=T), country.swl_tot=mean(swl_tot,na.rm=T)), by=country])

#Rename variable
ext.col.swl.means <- rename(ext.col.swl.means, c("country.ext_tot"="EXT_M", "country.col_tot"="COL_M", "country.swl_tot"="SWL_M"))

#Merge dataset
TEMP3 <- merge(TEMP3, ext.col.swl.means, by="country")

```

Group mean centre extroversion (using means without outliers). As per Model 1. 

```{r}

TEMP3$ext_c <- TEMP3$ext_tot - TEMP3$EXT_M

```

Model 2 MLM Analysis. As per the specifications in Model 1, only with more countries included in the analysis, and different Level 2 Extroversion and Collectivism variables. 

```{r}

model2 <- lme(swl_tot ~ COL_M + ext_c*EXT_M, data=TEMP3, random = ~ 1 + ext_c|country, correlation=corCompSymm(), method="REML", na.action=na.omit)
summary(model2)

#Interaction Lower bound CI
0.2461138 - (1.96*0.1289389)
#Interaction Upper bound CI
0.2461138 + (1.96*0.1289389)

#Positive correlation but negative coefficient in MLM model. 
cor.test(TEMP3$ext_c, TEMP3$swl_tot)

```
